#!/usr/bin/env python3
# -*- coding: utf-8; mode: Python; indent-tabs-mode: t -*-

# Copyright (C) 2012, 2013, 2017, 2021 - 2024 Olga Yakovleva <olga@rhvoice.org>
# Copyright (C) 2024  Mateo Cedillo <angelitomateocedillo@gmail.com>

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import collections
import sys
import os
import os.path
import shutil
import subprocess
import argparse
import codecs
import json
import re
import math
import struct
import weakref
import xml.etree.ElementTree as xml
from scipy import stats
import numpy
import uuid
from scipy.signal import firwin,lfilter,kaiserord
from scipy.io import wavfile
import pathlib
from tqdm import tqdm
import pyworld
import multiprocessing

version_pattern=re.compile(r"(?m)^\s*\$ver\s*=\s*'(\d+)'\s*;\s*$")
scriptdir=os.path.abspath(os.path.dirname(__file__))
RHVoice_bindir=os.path.normpath(os.path.join(scriptdir,"..","..","..","local","bin"))
datadir=os.path.normpath(os.path.join(scriptdir,"..","..","..","data"))
langdir=os.path.join(datadir,"languages")
voxdir=os.path.join(datadir,"voices")
workdir=os.path.abspath(".")
dataset="RHVoice"
parser=argparse.ArgumentParser(description="A few helper commands augmenting the standard HTS demo script")
subparsers=parser.add_subparsers()

class task(object):
	@property
	def settings(self):
		if not hasattr(self,"_settings"):
			with open("training.cfg","r") as f:
				self._settings=json.load(f)
		return self._settings

	@property
	def version(self):
		if not hasattr(self,"_version"):
			with open(os.path.join("scripts","Config.pm"),"r") as f:
				contents=f.read()
				self._version=int(version_pattern.search(contents).group(1))
		return self._version

	@property
	def version_name(self):
		return ("ver"+str(self.version))

	def get_analysis_params(self):
		params=collections.OrderedDict()
		if self.settings["sample_rate"]==16000:
			params["FRAMESHIFT"]=80
			params["FRAMELEN"]=400
			params["FFTLEN"]=512
			params["FREQWARP"]=0.42
			params["MGCORDER"]=24
		elif self.settings["sample_rate"]==22050:
			params["FRAMESHIFT"]=110
			params["FRAMELEN"]=550
			params["FFTLEN"]=1024
			params["FREQWARP"]=0.45
			params["MGCORDER"]=30
		elif self.settings["sample_rate"]==24000:
			params["FRAMESHIFT"]=120
			params["FRAMELEN"]=600
			params["FFTLEN"]=1024
			params["FREQWARP"]=0.466
			params["MGCORDER"]=30
		elif self.settings["sample_rate"]==32000:
			params["FRAMESHIFT"]=160
			params["FRAMELEN"]=800
			params["FFTLEN"]=1024
			params["FREQWARP"]=0.5
			params["MGCORDER"]=34
		elif self.settings["sample_rate"]==44100:
			params["FRAMESHIFT"]=220
			params["FRAMELEN"]=1100
			params["FFTLEN"]=2048
			params["FREQWARP"]=0.54
			params["MGCORDER"]=34
		elif self.settings["sample_rate"]==48000:
			params["FRAMESHIFT"]=240
			params["FRAMELEN"]=1200
			params["FFTLEN"]=2048
			params["FREQWARP"]=0.55
			params["MGCORDER"]=34
		params["BAPORDER"]=len(self.get_filter_band_edges())
		return params

	def get_phoneset(self):
		phoneset=dict()
		doc=xml.parse(os.path.join(langdir,self.settings["language"],"phonemes.xml"))
		for elem in doc.iterfind("phoneme"):
			name=elem.get("name")
			features=dict()
			for key,value in elem.items():
				if key!="name":
					features[key]=value
			phoneset[name]=features
		return phoneset

	def load_speech(self,name):
		samples=numpy.fromfile(os.path.join("data","raw",name+".raw"),numpy.int32).astype(float)
		samples/=(2**31)
		return samples

	def load_f0(self,name):
		lf0=numpy.fromfile(os.path.join("data","lf0",name+".lf0"),numpy.float32).astype(float)
		f0=numpy.zeros(lf0.size)
		m=(lf0!=-10000000000)
		f0[m]=numpy.exp(lf0[m])
		return f0

	def get_filter_band_edges(self):
		sr=self.settings["sample_rate"]
		nyq_freq=sr//2
		max_freq=min(16000,nyq_freq-2000)
		edges=[1000]
		for v in range(2000,max_freq+1,2000):
			edges.append(v)
		return edges

class configurator(task):
	def get_configure_params(self):
		params=collections.OrderedDict()
		params["--with-fest-search-path"]=os.path.join(self.settings["festdir"],"examples")
		for name in ["--with-sptk-search-path","--with-hts-search-path","--with-hts-engine-search-path"]:
			params[name]=self.settings["bindir"]
		params["SPEAKER"]=self.settings["speaker"]
		params["DATASET"]=dataset
		params["SAMPFREQ"]=self.settings["sample_rate"]
		params.update(self.get_analysis_params())
		params["USEGV"]=1 if self.settings.get("use_gv",False) else 0
		params["USEMSPF"]=0
		params["NITER"]=10
		return params

	def register(self):
		subparser=subparsers.add_parser("configure")
		subparser.set_defaults(func=self)

	def __call__(self,args):
		params=self.get_configure_params()
		command=["./configure"]
		print("HTS configuration script will be called with the folowin arguments:")
		for key,value in params.items():
			command.append("{}={}".format(key,value))
			print(command[-1])
		answer=input("Continue? ")
		if (len(answer)==0) or (answer[0]=="y"):
			subprocess.check_call(command)

class recordings_importer(task):
	def register(self):
		subparser=subparsers.add_parser("import-recordings")
		subparser.set_defaults(func=self)

	def get_file_list_from_ssml(self):
		ssml_path=self.settings["text"]
		if not os.path.exists(ssml_path):
			return None
		print("Checking ids in ssml")
		wavedir=self.settings["wavedir"]
		doc=xml.parse(ssml_path)
		res=[]
		i=0
		for e in doc.iterfind("s"):
			id=e.get("{http://www.w3.org/XML/1998/namespace}id", None)
			if not id:
				if res:
					print("No xml:id for sentence:", e.text)
					sys.exit()
				return None
			wav_path=os.path.abspath(os.path.join(wavedir, id+".wav"))
			if not os.path.exists(wav_path):
				print(wav_path, "not found")
				sys.exit()
			i+=1
			out_name="{}_{}_{:04}".format(dataset, self.settings["speaker"], i)
			print(id, "-> ", out_name)
			res.append((wav_path, id, out_name))
		return res

	def get_file_list(self):
		files=self.get_file_list_from_ssml()
		if files:
			return files
		regex=re.compile(r"([^0-9]*)([0-9]+)[^0-9]?.*\.wav")
		wavedir=self.settings["wavedir"]
		files=[]
		for inname in sorted(os.listdir(wavedir)):
			m=regex.match(inname)
			if not m:
				continue
			n=int(m.group(2))
			p=m.group(1)
			key=(p,n)
			inpath=os.path.abspath(os.path.join(wavedir,inname))
			files.append([inpath,inname,key])
		files.sort(key=lambda x: x[-1])
		for i,item in enumerate(files):
			item[-1]="{}_{}_{:04}".format(dataset,self.settings["speaker"],i+1)
		return files

	def process(self,inpath,inname,outname):
		cmd=[self.settings.get("praat_path","praat"),"--run",
			 os.path.join(scriptdir,"import.praat"),
			 inpath,
			 os.path.join(workdir,"data","wav",outname+".wav"),
			 os.path.join(workdir,"data","raw",outname+".raw"),
			 str(self.settings["sample_rate"]),
			 "1" if self.settings.get("invert",False) else "0",
		str(self.settings.get("silence_threshold",-40))]
		out=subprocess.check_output(cmd)
		return tuple(map(float, out.strip().split()))

	def __call__(self,args):
		subdirs=["raw","wav"]
		for subdir in subdirs:
			dir=os.path.join(workdir,"data",subdir)
			if not os.path.isdir(dir):
				os.mkdir(dir)
		files=self.get_file_list()
		mapping=collections.OrderedDict()
		times=collections.OrderedDict()
		pbar = tqdm(total=len(files), desc="Processing")
		for inpath,inname,outname in files:
			inbase=os.path.splitext(inname)[0]
			pbar.set_description("Processing {}".format(inname))
			t1,t2=self.process(inpath,inname,outname)
			mapping[outname]=inbase
			times[inbase]=[t1,t2]
			pbar.update(1)
		pbar.close()
		with open("file_name_mapping.json","w") as f:
			json.dump(mapping,f,indent=0)
		with open("trim.json","w") as f:
			json.dump(times,f,indent=0)

class htk_segmenter(task):
	def register(self):
		subparser=subparsers.add_parser("segment")
		subparser.set_defaults(func=self)

	def make_dirs(self):
		dirs=["htk",os.path.join("data","labels"),os.path.join("data","labels","mono")]
		for dir in dirs:
			if not os.path.isdir(dir):
				os.mkdir(dir)

	def setup(self):
		self.num_states=self.settings.get("seg_num_states",5)
		assert(self.num_states in [3,5])
		self.alppf_regex=re.compile("^.*average log prob per frame = (.+)$")
		self.make_dirs()
		self.workdir="htk"
		self.phoneset=self.get_phoneset()
		self.plosives=set(phone for phone,features in self.phoneset.items() if features.get("ctype")=="s")
		self.affricates=set(phone for phone,features in self.phoneset.items() if features.get("ctype")=="a")
		self.stops=self.plosives|self.affricates
		self.voiced_consonants=set(phone for phone,features in self.phoneset.items() if features.get("cvox")=="+")
		self.wavdir=os.path.join("data","wav")
		self.recordings=[os.path.splitext(name)[0] for name in sorted(os.listdir(self.wavdir))]
		self.param_settings={
			"TARGETKIND" : "MFCC_0_D_A", 
			"TARGETRATE" : 50000, 
			"SAVECOMPRESSED" : "T", 
			"SAVEWITHCRC" : "T", 
			"WINDOWSIZE" : 100000, 
			"USEHAMMING" : "T", 
			"PREEMCOEF" : 0.97, 
			"NUMCHANS" : 26, 
			"CEPLIFTER" : 22, 
			"NUMCEPS" : 12, 
			"ENORMALISE" : "T"}
		self.coding_settings=dict(self.param_settings)
		self.coding_settings["SOURCEFORMAT"]="WAV"
		self.coding_conf_path=os.path.join(self.workdir,"coding.conf")
		self.coding_scp_path=os.path.join(self.workdir,"coding.scp")
		self.mfccdir=os.path.join(self.workdir,"mfcc")
		self.mfcc_scp_path=os.path.join(self.workdir,"mfcc.scp")
		self.param_conf_path=os.path.join(self.workdir,"param.conf")
		self.hmmdir=os.path.join(self.workdir,"hmm")
		self.proto_path=os.path.join(self.workdir,"proto")
		self.proto1_path=os.path.join(self.workdir,"proto1")
		self.sil_mmf_path=os.path.join(self.workdir, "sil.mmf")
		with open("file_name_mapping.json") as f:
			self.file_name_mapping=json.load(f)
		self.init_sil_flag=self.settings.get("seg_init_sil", False)
		if self.init_sil_flag:
			from webrtcvad import Vad
			self.vad_frame_size=self.settings.get("seg_vad_frame_size", 30)
			assert(self.vad_frame_size in [10, 20, 30])
			self.vad_frame_size*=16
			mode=self.settings.get("seg_vad_mode", 0)
			assert(mode in [0, 1, 2, 3])
			self.vad=Vad(mode)
			self.vad_mlf_path=os.path.join(self.workdir, "vad.mlf")

	def transcribe(self):
		self.transcriptions=[]
		self.lex={"SILENCE":[["pau"]]}
		phoneset=set()
		trans_path=os.path.join(self.workdir,"transcription")
		subprocess.check_call([os.path.join(RHVoice_bindir,"RHVoice-transcribe-sentences"),"-b","ssil",self.settings["text"],trans_path])
		i=-1
		with open(trans_path,"r") as f:
			for line in f:
				phones=line.split()
				if not phones:
					continue
				words=[]
				i+=1
				name=self.recordings[i]
				for ph in phones[1:-1]:
					if ph in ["ssil","pau"]:
						if ph=="pau":
							words[-1]=words[-1]+"_before_punctuation"
						elif ph=="ssil":
							words[-1]="word_final_"+words[-1]
					else:
						phoneset.add(ph)
						words.append(ph)
				self.transcriptions.append([name,words])
		self.phonelist=["pau"]
		for ph in phoneset:
			self.phonelist.append(ph)
			self.lex[ph]=[[ph]]
			self.lex["word_final_"+ph]=[[ph],[ph,"pau"]]
			self.lex[ph+"_before_punctuation"]=[[ph,"pau"],[ph]]
		self.phonelist.sort()

	def output_dict(self):
		dict_path=os.path.join(self.workdir,"dict")
		with open(dict_path,"wt") as f:
			for word in sorted(self.lex.keys()):
				prons=self.lex[word]
				for pron in prons:
					f.write(word)
					f.write(" ")
					f.write(" ".join(pron))
					f.write("\n")

	def output_words_mlf(self):
		path=os.path.join(self.workdir,"words.mlf")
		with open(path,"wt") as f:
			f.write("#!MLF!#\n")
			for name,words in self.transcriptions:
				f.write('"*/{}.lab"\n'.format(name))
				for word in words:
					f.write(word)
					f.write("\n")
				f.write(".\n")

	def hrun(self,tool,*args,**kw):
		logdir=os.path.join(self.workdir,"log")
		if not os.path.isdir(logdir):
			os.mkdir(logdir)
		cmd=[os.path.join(self.settings["htk_bindir"],tool)]
		cmd.append("-D")
		cmd.append("-A")
		cmd.extend(["-T","3"])
		cmd.extend(args)
		if "tag" in kw:
			fname=tool+"_"+kw["tag"]
		else:
			fname=tool
		with open(os.path.join(logdir,fname),"w") as f:
			subprocess.check_call(cmd,stdout=f)

	def expand_words_mlf(self):
		words_mlf_path=os.path.join(self.workdir,"words.mlf")
		dict_path=os.path.join(self.workdir,"dict")
		phones_mlf_path=os.path.join(self.workdir,"phones1.mlf")
		self.hrun("HLEd","-l","*","-d",dict_path,"-i",phones_mlf_path,os.path.join(scriptdir,"w2p.led"),words_mlf_path,tag="expand")

	def output_phonelist(self):
		path=os.path.join(self.workdir,"phonelist")
		phlist=list(self.phonelist)
		phlist.sort()
		self.output_lines(phlist,path)

	def output_config(self,params,path):
		with open(path,"w") as f:
			for k,v in sorted(params.items()):
				f.write("{} = {}\n".format(k,v))

	def output_lines(self,values,path):
		with open(path,"w") as f:
			for v in values:
				f.write(v)
				f.write("\n")

	def read_lines(self,path):
		lines=[]
		with open(path,"r") as f:
			for line in f:
				text=line.strip()
				if text:
					lines.append(text)
		return lines

	def get_hmm_dir(self,i):
		if not os.path.isdir(self.hmmdir):
			os.mkdir(self.hmmdir)
		dir=os.path.join(self.hmmdir,str(i))
		if not os.path.isdir(dir):
			os.mkdir(dir)
		return dir

	def get_last_hmm_number(self):
		return max([int(name) for name in os.listdir(self.hmmdir)])

	def code(self):
		if not os.path.isdir(self.mfccdir):
			os.mkdir(self.mfccdir)
		self.output_lines(["{wavdir}/{filename}.wav {mfccdir}/{filename}.mfcc".format(wavdir=self.wavdir,mfccdir=self.mfccdir,filename=filename) for filename in self.recordings],self.coding_scp_path)
		self.output_config(self.param_settings,self.param_conf_path)
		self.output_config(self.coding_settings,self.coding_conf_path)
		print("Coding")
		self.hrun("HCopy","-C",self.coding_conf_path,"-S",self.coding_scp_path,"-F","WAV")
		print("Done")
		self.output_lines(["{}/{}.mfcc".format(self.mfccdir,filename) for filename in self.recordings],self.mfcc_scp_path)

	def make_proto_trans_matrix(self,ns):
		n=ns+2
		m=numpy.zeros((n,n))
		m[0,1]=1
		for i in range(1,n-2):
			m[i,i]=0.6
			m[i,i+1]=0.4
		m[-2,-2]=0.7
		m[-2,-1]=0.3
		return m

	def build_proto(self,n,ns):
		proto=[]
		proto.append("~o <VECSIZE> {} <{}>".format(n,self.param_settings["TARGETKIND"]))
		proto.append('~h "proto"')
		proto.append("<BeginHMM>")
		proto.append("<NumStates> {}".format(ns+2))
		for i in range(2,ns+2):
			proto.append("<State> {}".format(i))
			proto.append("<Mean> {}".format(n))
			proto.append(" ".join(["0.0"]*n))
			proto.append("<Variance> {}".format(n))
			proto.append(" ".join(["1.0"]*n))
		proto.append("<TransP> {}".format(ns+2))
		m=self.make_proto_trans_matrix(ns)
		for i in range(m.shape[0]):
			proto.append(" ".join(map(str,m[i])))
		proto.append("<EndHMM>")
		return proto

	def patch_initial_hmm(self, hmm, name):
		if name!="pau":
			return hmm
		if not self.init_sil_flag:
			return hmm
		sil=self.read_lines(self.sil_mmf_path)
		pau=list(hmm)
		for t in ["<MEAN>", "<VARIANCE>"]:
			v=[sil[i+1] for i in range(len(sil)) if sil[i].startswith(t)][0]
			for i, line in enumerate(hmm):
				if hmm[i].startswith(t):
					pau[i+1]=v
		return pau

	def output_hmmdefs(self):
		proto=self.read_lines(os.path.join(self.get_hmm_dir(0),"proto"))
		lines=[]
		for p in self.phonelist:
			lines.append('~h "{}"'.format(p))
			lines.extend(self.patch_initial_hmm(proto, p)[4:])
		self.output_lines(lines,os.path.join(self.get_hmm_dir(0),"hmmdefs"))

	def output_macros(self):
		dir=self.get_hmm_dir(0)
		lines=self.read_lines(os.path.join(dir,"proto"))[:3]
		lines.extend(self.read_lines(os.path.join(dir,"vFloors")))
		self.output_lines(lines,os.path.join(dir,"macros"))

	def get_alppf(self,i):
		lines=self.read_lines(os.path.join(self.workdir,"log","HERest_{}".format(i)))
		for line in reversed(lines):
			m=self.alppf_regex.match(line)
			if m:
				return float(m.group(1))
		raise RuntimeError("Average log prob per frame not found")

	def reest(self,i,j):
		phlist=os.path.join(self.workdir,"phonelist")
		phmlf=os.path.join(self.workdir,"phones{}.mlf".format(j))
		indir=self.get_hmm_dir(i)
		outdir=self.get_hmm_dir(i+1)
		self.hrun("HERest","-C",self.param_conf_path,"-I",phmlf,"-t","250.0","150.0","1000.0","-S",self.mfcc_scp_path,"-H",os.path.join(indir,"macros"),"-H",os.path.join(indir,"hmmdefs"),"-M",outdir,phlist,tag=str(i))

	def reest_until(self,n,j,min_epsilon,max_iter):
		initial=True
		for i in range(n,n+max_iter):
			print("Reestimating hmm {}".format(i))
			self.reest(i,j)
			alppf=self.get_alppf(i)
			print("Average log prob per frame = {}".format(alppf))
			if initial:
				initial=False
			else:
				epsilon=alppf-prev_alppf
				print("Epsilon = {}".format(epsilon))
				if epsilon<=min_epsilon:
					if epsilon<=0:
						shutil.rmtree(self.get_hmm_dir(i+1))
						return i
					else:
						return (i+1)
			prev_alppf=alppf
		return (n+max_iter)

	def edit_hmm(self,i,script_name):
		if os.path.split(script_name)[0]:
			script=script_name
		else:
			script=os.path.join(scriptdir,script_name)
		phlist=os.path.join(self.workdir,"phonelist")
		indir=self.get_hmm_dir(i)
		outdir=self.get_hmm_dir(i+1)
		self.hrun("HHEd","-H",os.path.join(indir,"macros"),"-H",os.path.join(indir,"hmmdefs"),"-M",outdir,script,phlist)

	def inc_mix(self,n,m):
		print("Incrementing number of mixture components to {}".format(m))
		script_path=os.path.join(self.workdir,"mix{}.hed".format(m))
		content=["MU {} {{*.state[2-6].mix}}".format(m)]
		self.output_lines(content,script_path)
		self.edit_hmm(n,script_path)

	def inc_mix_and_reest_until(self,n,j,min_epsilon1,max_iter):
		steps=self.settings.get("seg_inc_mix_steps",[2,3])
		if not steps:
			return n
		prev_n=n
		prev_alppf=self.get_alppf(n-1)
		assert(steps[0]==2)
		for m in steps:
			self.inc_mix(prev_n,m)
			next_n=self.reest_until(prev_n+1,j,min_epsilon1,max_iter)
			next_alppf=self.get_alppf(next_n-1)
			epsilon2=next_alppf-prev_alppf
			print("epsilon2={}".format(epsilon2))
			if epsilon2<=0:
				return prev_n
			prev_n=next_n
			prev_alppf=next_alppf
		return next_n

	def align(self,i,j):
		moddir=self.get_hmm_dir(i)
		inmlf=os.path.join(self.workdir,"words.mlf")
		outmlf1=os.path.join(self.workdir,"aligned{}.mlf".format(j))
		outmlf2=os.path.join(self.workdir,"phones{}.mlf".format(j))
		phlist=os.path.join(self.workdir,"phonelist")
		dict_path=os.path.join(self.workdir,"dict")
		print("Aligning with hmm {}".format(i))
		self.hrun("HVite","-o","SW","-l","*","-b","SILENCE","-C",self.param_conf_path,"-a","-H",os.path.join(moddir,"macros"),"-H",os.path.join(moddir,"hmmdefs"),"-i",outmlf1,"-m","-t","250.0","150.0","1000.0","-y","lab","-I",inmlf,"-S",self.mfcc_scp_path,dict_path,phlist,tag="align{}".format(j))
		print("Done")
		self.hrun("HLEd","-l","*","-i",outmlf2,os.path.join(scriptdir,"pau.led"),outmlf1,tag="fixpau{}".format(j))
		self.find_problematic_alignments(j)

	def find_problematic_alignments(self,j):
		stats=collections.Counter()
		lines=self.read_lines(os.path.join(self.workdir,"log","HVite_align{}".format(j)))
		for line in lines:
			m=re.match(u"^Aligning File: .+/([^/]+)\\.mfcc$",line)
			if m:
				name=m.group(1)
				src_name=self.file_name_mapping[name]
				continue
			if not line.startswith("No tokens survived to final node of network"):
				continue
			stats[src_name]+=1
		with codecs.open(os.path.join(self.workdir,"problematic_files_{}.txt".format(j)),"w","utf-8") as f:
			for name,count in stats.most_common():
				f.write(u"{}\t{}\n".format(name,count))

	def load_alignments(self,j):
		path=os.path.join(self.workdir,"phones{}.mlf".format(j))
		lines=self.read_lines(path)[1:]
		self.alignments=[]
		self.durations=collections.defaultdict(list)
		for line in lines:
			if line==".":
				self.alignments.append([file_name,labels])
			elif line.startswith('"*/'):
				file_name=line[3:-1]
				labels=[]
			else:
				tokens=line.split()
				start=int(tokens[0])
				end=int(tokens[1])
				duration=end-start
				phone=tokens[2]
				labels.append([start,end,phone])
				self.durations[phone].append(duration)

	def fix_labels(self):
		self.labels=[]
		pau_plus_stop_limits=dict()
		for phone in self.stops:
			pau_plus_stop_limits[phone]=stats.scoreatpercentile(self.durations[phone],95)
		for file_name,labs in self.alignments:
			new_labs=[list(labs[0]),list(labs[1])]
			for i in range(2,len(labs)-1):
				if new_labs[-1][2]=="pau" and labs[i][2] in self.stops and (labs[i][1]-new_labs[-1][0]) < pau_plus_stop_limits[labs[i][2]]:
					new_labs[-1][1]=labs[i][1]
					new_labs[-1][2]=labs[i][2]
				else:
					new_labs.append(list(labs[i]))
			new_labs.append(list(labs[-1]))
			self.labels.append([file_name,new_labs])

	def save_labels(self):
		for file_name,labs in self.labels:
			path=os.path.join("data","labels","mono",file_name)
			with open(path,"w") as f:
				for start,end,phone in labs:
					f.write("{} {} {}\n".format(start,end,phone))

	def generate_vad_data_frames(self, samples):
		shift=80
		win_size=self.vad_frame_size
		half_win_size=win_size//2
		self.vad_time_factor=10000000
		padded_samples=numpy.pad(samples, (half_win_size, 0))
		for i in range(0, padded_samples.size-win_size+1, shift):
			data=padded_samples[i:i+win_size].tobytes()
			time=int(i*self.vad_time_factor/16000)
			yield (data, time)

	def generate_vad_frames(self, samples):
		for data, time in self.generate_vad_data_frames(samples):
			is_speech=self.vad.is_speech(data, 16000)
			yield (is_speech, time)

	def get_vad_start_frames(self, samples):
		pvf=(None, 0)
		for vf in self.generate_vad_frames(samples):
			if vf[0]!=pvf[0]:
				yield vf
			pvf=vf

	def vad_file(self, name):
		sr, samples=wavfile.read(os.path.join(self.wavdir, name+".wav"))
		assert(sr==16000)
		assert(samples.dtype==numpy.int16)
		frames=list(self.get_vad_start_frames(samples))
		names=["speech" if f[0] else "sil" for f in frames]
		starts=[f[1] for f in frames]
		ends=[f[1] for f in frames[1:]]+[int(samples.size*self.vad_time_factor/sr)]
		return zip(names, starts, ends)

	def do_vad(self):
		if not self.init_sil_flag:
			return
		print("Aplying Voice Activity Detection")
		with open(self.vad_mlf_path, "wt") as f:
			f.write("#!MLF!#\n")
			for file_name in self.recordings:
				f.write('"*/{}.lab"\n'.format(file_name))
				for name, start, end in self.vad_file(file_name):
					f.write("{} {} {}\n".format(start, end, name))
				f.write(".\n")
		print("Done")

	def __call__(self,args):
		self.setup()
		self.do_vad()
		self.transcribe()
		self.output_dict()
		self.output_words_mlf()
		self.expand_words_mlf()
		self.output_phonelist()
		self.code()
		if self.init_sil_flag:
			self.output_lines(self.build_proto(39,1), self.proto1_path)
			shutil.copy(self.proto1_path, self.sil_mmf_path)
			self.hrun("HInit", "-C", self.param_conf_path, "-S", self.mfcc_scp_path, "-l", "sil", "-I", self.vad_mlf_path, "-H", self.sil_mmf_path, "-o", "sil", "-M", self.workdir, "proto", tag="sil")
			self.hrun("HRest", "-C", self.param_conf_path, "-S", self.mfcc_scp_path, "-l", "sil", "-I", self.vad_mlf_path, "-H", self.sil_mmf_path, "-M", self.workdir, "sil", tag="sil")
		self.output_lines(self.build_proto(39,self.num_states),self.proto_path)
		self.hrun("HCompV","-C",self.param_conf_path,"-f","0.01","-m","-S",self.mfcc_scp_path,"-M",self.get_hmm_dir(0),self.proto_path)
		self.output_hmmdefs()
		self.output_macros()
		max_reest_iter=100
		n=self.reest_until(0,1,0.1,max_reest_iter)
		self.edit_hmm(n,"pau{}.hed".format(self.num_states))
		n+=1
		n=self.reest_until(n,1,0.01,max_reest_iter)
		self.align(n,2)
		n=self.reest_until(n,2,0.005,max_reest_iter)
		self.align(n,3)
		n=self.reest_until(n,3,0.001,max_reest_iter)
		n=self.inc_mix_and_reest_until(n,3,0.001,max_reest_iter)
		self.align(n,4)
		self.load_alignments(4)
		self.fix_labels()
		self.save_labels()

class labeller(task):
	def register(self):
		subparser=subparsers.add_parser("label")
		subparser.set_defaults(func=self)

	def __call__(self,args):
		labdir=os.path.join("data","labels","full")
		testdir=os.path.join("data","labels","gen")
		for dir in [labdir,testdir]:
			if not os.path.isdir(dir):
				os.mkdir(dir)
		subprocess.check_call([os.path.join(RHVoice_bindir,"RHVoice-make-hts-labels"),"-l",os.path.join("data","labels","mono"),self.settings["text"],labdir])
		subprocess.check_call([os.path.join(RHVoice_bindir,"RHVoice-make-hts-labels"),"-p","test",self.settings["test"],testdir])
		os.chdir("data")
		subprocess.check_call(["make","mlf"])
		subprocess.check_call(["make","list"])
		subprocess.check_call(["make","scp"])
		os.chdir("..")

class f0_extracter(task):
	def register(self):
		subparser=subparsers.add_parser("extract-f0")
		subparser.add_argument("--cores",type=int,default=1,help="Number of CPU cores to use for parallel extraction.")
		subparser.set_defaults(func=self)

	def extract_with_praat(self,filepath,method,min_f0,max_f0):
		assert(method in ["praat_ac","praat_cc"])
		wavfile.write("utt.wav",self.settings["sample_rate"],numpy.fromfile(filepath,numpy.int32))
		voicing_threshold=self.settings.get(method+"_voicing_threshold",0.45)
		command=[self.settings.get("praat_path","praat"),"--run"]
		command.append(os.path.join(scriptdir,"print-pitch.praat"))
		command.append(os.path.abspath("utt.wav"))
		command.append(str(min_f0))
		command.append(str(max_f0))
		command.append(str(voicing_threshold))
		command.append("1" if method.endswith("_ac") else "0")
		output=subprocess.check_output(command)
		os.remove("utt.wav")
		return list(map(float,output.split()))

	def extract_with_sptk(self,filepath,method,min_f0,max_f0):
		methods={"sptk_rapt":"0","sptk_swipe":"1","sptk_reaper":"2"}
		assert(method in methods.keys())
		voicing_threshold=self.settings.get(method+"_voicing_threshold",None)
		audio=((numpy.fromfile(filepath,numpy.int32).astype(float))/(2**16)).astype(numpy.float32)
		command=[os.path.join(self.settings["bindir"],"pitch")]
		command.extend(["-a",methods[method]])
		command.extend(["-s",str(float(self.settings["sample_rate"])/1000.0)])
		command.extend(["-p",str(self.get_analysis_params()["FRAMESHIFT"])])
		command.extend(["-L",str(min_f0)])
		command.extend(["-H",str(max_f0)])
		command.extend(["-o","1"])
		if voicing_threshold is not None:
			command.extend(["-t"+methods[method],str(voicing_threshold)])
		process=subprocess.Popen(command,stdin=subprocess.PIPE,stdout=subprocess.PIPE)
		output=process.communicate(audio.tobytes())[0]
		return list(numpy.frombuffer(output, dtype=numpy.float32))

	def extract_with_reaper(self,raw_path,min_f0,max_f0):
		f0_path="utt.f0"
		wav_path="utt.wav"
		extra_opts=[]
		if "reaper_options" in self.settings:
			extra_opts=self.settings["reaper_options"].split()
		audio=numpy.fromfile(raw_path,numpy.int32).astype(float)/(2**31)
		m=2**15
		audio=numpy.round(audio*m)
		audio[audio<-m]=-m
		audio[audio>(m-1)]=m-1
		audio=audio.astype(numpy.int16)
		wavfile.write(wav_path,self.settings["sample_rate"],audio)
		subprocess.check_call([self.settings["reaper_path"],"-a","-m",str(min_f0),"-x",str(max_f0),"-i",wav_path,"-f",f0_path]+extra_opts)
		os.remove(wav_path)
		f0s=[]
		with open(f0_path,"rt") as f_in:
			for line in f_in:
				if line.strip()=="EST_Header_End":
					break
			for line in f_in:
				f0=float(line.split()[-1])
				if f0<0:
					f0=0
				f0s.append(f0)
		os.remove(f0_path)
		return f0s

	def extract_with_penn(self,raw_path,min_f0,max_f0):
		samples=numpy.fromfile(raw_path,numpy.int32).astype(float)
		samples/=(2**31)
		audio = self.torch.tensor(numpy.copy(samples))[None].float()
		f0, periodicity = self.penn.from_audio(
			audio,
			self.settings["sample_rate"],
			hopsize=0.005,
			fmin=min_f0,
			fmax=max_f0,
			# Select a checkpoint to use for inference. The default checkpoint will
			# download and use FCNF0++ pretrained on MDB-stem-synth and PTDB
			checkpoint=None,
			# If you are using a gpu, pick a batch size that doesn't cause memory errors
			# on your gpu
			batch_size=2048,
			# Centers frames at hopsize / 2, 3 * hopsize / 2, 5 * hopsize / 2, ...
			center = 'zero',
			# (Optional) Linearly interpolate unvoiced regions below periodicity threshold
			interp_unvoiced_at=0.65,
			gpu=None)
		return f0[0].numpy().tolist()

	def extract_with_world(self,raw_path,method,min_f0,max_f0):
		assert(method in ["world_dio","world_harvest"])
		samples=numpy.fromfile(raw_path,numpy.int32).astype(float)
		samples/=(2**31)
		if method.endswith("_dio"):
			f0s, times = pyworld.dio(samples, self.settings["sample_rate"], min_f0, max_f0)
		else:
			f0s, times = pyworld.harvest(samples, self.settings["sample_rate"], min_f0, max_f0)
		return f0s.tolist()

	def extract_with_rmvpe(self,raw_path, min_f0, max_f0):
		speech=numpy.fromfile(raw_path,numpy.int32).astype(float)
		speech/=(2**31)
		f0s = self.rmvpe_model.infer_from_audio(speech, self.settings["sample_rate"], device="cpu", thred=0.009, use_viterbi=False)
		f0s[(f0s < min_f0) | (f0s > max_f0)] = 0
		return f0s.tolist()		

	def or_values(self, values):
		r=values[0]
		for v in values[1:]:
			r=r or v
		return r

	def extract_and_or(self, file_path, methods, min_f0, max_f0):
		values=[self.extract_with(file_path, method, min_f0, max_f0) for method in methods.split("|")]
		return [self.or_values(x) for x in zip(*values)]

	def extract_with(self,raw_path,method,min_f0,max_f0):
		if "|" in method:
			return self.extract_and_or(raw_path, method, min_f0, max_f0)
		elif method.startswith("praat_"):
			return self.extract_with_praat(raw_path,method,min_f0,max_f0)
		elif method=="reaper":
			return self.extract_with_reaper(raw_path,min_f0,max_f0)
		elif method=="penn":
			if hasattr(self, "torch") == False:
				import torch
				self.torch = torch
			if hasattr(self, "penn") == False:
				import penn
				self.penn = penn
			return self.extract_with_penn(raw_path,min_f0,max_f0)
		elif method.startswith("world_"):
			return self.extract_with_world(raw_path,method,min_f0,max_f0)
		elif method=="rmvpe":
			if hasattr(self, "rmvpe_model") == False:
				if not os.path.exists(self.settings["rmvpe_dir"]):
					print("RMVPE dir is not exists.\n", file=sys.stderr)
					sys.exit(1)
				sys.path.append(os.path.abspath(self.settings["rmvpe_dir"]))
				if not os.path.exists(self.settings["rmvpe_model_path"]):
					print("RMVPE model path is not exists.\n", file=sys.stderr)
					sys.exit(1)
				from src import RMVPE
				self.rmvpe_model = RMVPE(self.settings["rmvpe_model_path"], hop_length=80)
			return self.extract_with_rmvpe(raw_path, min_f0, max_f0)
		else:
			return self.extract_with_sptk(raw_path,method,min_f0,max_f0)

	def extract_and_vote(self,raw_path,methods,min_f0,max_f0):
		extracted=list()
		for method in sorted(set(methods)):
			extracted.append(self.extract_with(raw_path,method,min_f0,max_f0))
		result=list()
		for values in zip(*extracted):
			nzvalues=[v for v in values if v!=0]
			if len(nzvalues)>=(len(values)-len(nzvalues)):
				result.append(numpy.median(nzvalues))
			else:
				result.append(0)
		return result

	def extract(self,raw_path,min_f0,max_f0):
		method=self.settings["f0_method"]
		if isinstance(method,str):
			return self.extract_with(raw_path,method,min_f0,max_f0)
		else:
			return self.extract_and_vote(raw_path,method,min_f0,max_f0)

	def process(self,name):
		raw_dir=os.path.join("data","raw")
		base,ext=os.path.splitext(name)
		raw_path=os.path.join(raw_dir,name)
		min_f0=self.settings["lower_f0"]
		max_f0=self.settings["upper_f0"]
		return self.extract(raw_path,min_f0,max_f0)

	def get_number_of_frames(self,name):
		with open(os.path.join("data","raw",name),"rb") as fp:
			samples=numpy.fromfile(fp,numpy.int32)
		return int(numpy.ceil(samples.size/float(self.shift)))

	def _worker_f0(self, name):
		f0_dir=os.path.join("data","lf0")
		base,ext=os.path.splitext(name)
		values=self.process(name)
		nzvalues=[v for v in values if v!=0]
		if not nzvalues:
			print(f"\nWarning: No voiced frames found for {name}. Skipping file.", file=sys.stderr)
			return
		nframes=self.get_number_of_frames(name)
		if nframes>len(values):
			values.extend((nframes-len(values))*[0])
		with open(os.path.join(f0_dir,base+".lf0"),"wb") as f:
			for f0 in values:
				if f0==0:
					lf0=-10000000000.0
				else:
					lf0=math.log(f0)
				f.write(struct.pack("=f",lf0))

	def __call__(self,args):
		self.shift=self.get_analysis_params()["FRAMESHIFT"]
		raw_dir=os.path.join("data","raw")
		f0_dir=os.path.join("data","lf0")
		if not os.path.isdir(f0_dir):
			os.mkdir(f0_dir)
		filelist = os.listdir(raw_dir)
		tasks = [name for name in filelist if not os.path.exists(os.path.join(f0_dir, os.path.splitext(name)[0] + ".lf0"))]
		if not tasks:
			print("Extracting f0: All files are already processed.")
			return
		print(f"Extracting f0 using {args.cores} core(s).")
		if args.cores > 1:
			with multiprocessing.Pool(processes=args.cores) as pool:
				list(tqdm(pool.imap_unordered(self._worker_f0, tasks), total=len(tasks), desc="processing"))
		else:
			for name in tqdm(tasks, desc="Processing"):
				self._worker_f0(name)

class f0_range_computer(f0_extracter):
	def register(self):
		subparser=subparsers.add_parser("guess-f0-range")
		subparser.set_defaults(func=self)

	def __call__(self,args):
		raw_dir=os.path.join("data","raw")
		filelist = os.listdir(raw_dir)
		data=list()
		if "guess_f0_method" in self.settings:
			method=self.settings["guess_f0_method"]
		else:
			method=None
		if (not isinstance(method,str)) or len(method)==0:
			method="sptk_rapt"
		pbar = tqdm(total=len(filelist), desc="Processing")
		for name in sorted(filelist):
			pbar.set_description("Processing {}".format(name))
			values=self.extract_with(os.path.join(raw_dir,name),method,40,700)
			values=[v for v in values if v!=0]
			data.extend(numpy.log(values))
			pbar.update(1)
		pbar.close()
		data=numpy.array(data)
		m=numpy.mean(data)
		d=3.0*numpy.std(data)
		min_f0=int(numpy.round(numpy.exp(m-d)))
		max_f0=int(numpy.round(numpy.exp(m+d)))
		with open("training.cfg") as f:
			settings=json.load(f,object_pairs_hook=collections.OrderedDict)
		settings["lower_f0"]=min_f0
		settings["upper_f0"]=max_f0
		with open("tmp.cfg","w") as f:
			json.dump(settings,f,indent=0)
		os.rename("tmp.cfg","training.cfg")
		print("Saved in training.cfg: {} - {} HZ".format(min_f0,max_f0))

class synthesizer(task):
	def register(self):
		subparser=subparsers.add_parser("synth")
		subparser.set_defaults(func=self)

	def read_bytes(self,f,n):
		while(True):
			bytes=f.read(n)
			if not bytes:
				return
			yield bytes

	def create_pitch_file(self,name):
		outpath="utt.pitch"
		sample_rate=float(self.settings["sample_rate"])
		with open(os.path.join("data","lf0",name+".lf0"),"rb") as f_in:
			with open(outpath,"wb") as f_out:
				for bytes in self.read_bytes(f_in,4):
					lf0=struct.unpack("=f",bytes)[0]
					period=0
					if lf0!=-10000000000:
						f0=numpy.exp(lf0)
						period=sample_rate/f0
					f_out.write(struct.pack("=f",period))
		return outpath

	def process(self,name):
		params=self.get_analysis_params()
		bindir=self.settings["bindir"]
		excite=os.path.join(bindir,"excite")
		x2x=os.path.join(bindir,"x2x")
		mlsadf=os.path.join(bindir,"mlsadf")
		mgc_path=os.path.join("data","mgc",name+".mgc")
		wav_path=os.path.join("data","synth",name+".wav")
		pitch_path=self.create_pitch_file(name)
		cmd=[excite,"-p",str(params["FRAMESHIFT"]),pitch_path,"|",
			 mlsadf,"-m",str(params["MGCORDER"]),"-a",str(params["FREQWARP"]),"-p",str(params["FRAMESHIFT"]),mgc_path,"|",
			 x2x,"-o","+fs","|",
			 "sox","-t","raw","-e","signed","-b","16","-r",str(self.settings["sample_rate"]),"-",wav_path]
		subprocess.check_call(" ".join(cmd),shell=True)
		os.remove(pitch_path)

	def __call__(self,args):
		synth_dir=os.path.join("data","synth")
		if not os.path.isdir(synth_dir):
			os.mkdir(synth_dir)
		raw_dir=os.path.join("data","raw")
		filelist = os.listdir(raw_dir)
		pbar = tqdm(total=len(filelist), desc="Processing")
		for name in sorted(filelist):
			base,ext=os.path.splitext(name)
			pbar.set_description("Processing {}".format(base))
			self.process(base)
			pbar.update(1)
		pbar.close()

class phonetic_feature_table(object):
	def __init__(self):
		self._tier_names=list()
		self._tiers=dict()
		self._phonemes=set()

	def tag(self,phoneme,tier_name,feature):
		self._phonemes.add(phoneme)
		if tier_name in self._tiers:
			tier=self._tiers[tier_name]
		else:
			tier=collections.OrderedDict()
			self._tiers[tier_name]=tier
			self._tier_names.append(tier_name)
		if feature in tier:
			tier[feature].add(phoneme)
		else:
			phonemes=set()
			phonemes.add(phoneme)
			tier[feature]=phonemes

	def number_of_tiers(self):
		return len(self._tier_names)

	def tier_name(self,index):
		return self._tier_names[index-1]

	def tier_features(self,index):
		res=[None]
		for k in self._tiers[self.tier_name(index)].keys():
			res.append(k)
		return (res)

	def phonemes_with_feature(self,tier_index,feature):
		if feature is None:
			return self._phonemes
		else:
			return self._tiers[self.tier_name(tier_index)][feature]

class phonetic_class(object):
	def __init__(self,feature_table=None,parent=None,feature=None):
		if feature_table is not None:
			self.feature_table=feature_table
			self.parent=None
			self.feature=None
			self.level=0
		else:
			self.feature_table=parent.feature_table
			self.feature=feature
			self.level=parent.level+1
			self.parent=weakref.proxy(parent)
		self.children=list()

	def phonemes(self):
		if self.parent is None:
			return self.feature_table.phonemes_with_feature(1,None)
		else:
			return self.parent.phonemes().intersection(self.feature_table.phonemes_with_feature(self.level,self.feature))

	def name(self):
		if self.parent is None:
			return ""
		else:
			parent_name=self.parent.name()
			if self.feature is None:
				return parent_name
			else:
				s="{}=={}".format(self.feature_table.tier_name(self.level),self.feature)
				if parent_name:
					return "{}&&{}".format(parent_name,s)
				else:
					return s

	def is_null(self):
		node=self
		while node is not None:
			if node.feature is not None:
				return False
			node=node.parent
		return True

	def expand(self):
		for feature in self.feature_table.tier_features(self.level+1):
			child=phonetic_class(parent=self,feature=feature)
			if len(child.phonemes())>1:
				self.children.append(child)

class hts_feature(object):
	def __init__(self,name,prefix,suffix,value_pattern):
		self.name=name
		self.prefix=prefix
		self.suffix=suffix
		if prefix:
			format_string="*"+prefix
			pattern=".+"+re.escape(prefix)
		else:
			format_string=""
			pattern="^"
		format_string+="{}"
		pattern+="({})".format(value_pattern)
		if suffix:
			format_string+=(suffix+"*")
			pattern+=(re.escape(suffix)+".+")
		else:
			pattern+="$"
		self.format_string=format_string
		self.regex=re.compile(pattern)

	def extract_value(self,label):
		return self.regex.match(label.decode('utf-8')).group(1)

class questions_maker(task):
	def register(self):
		subparser=subparsers.add_parser("make-questions")
		subparser.set_defaults(func=self)

	def build_phonetic_feature_table(self):
		feature_table=phonetic_feature_table()
		for phoneme,features in self.get_phoneset().items():
			for name,value in features.items():
				feature_table.tag(phoneme,name,value)
		return feature_table

	def generate_phonetic_questions(self):
		feature_table=self.build_phonetic_feature_table()
		number_of_tiers=feature_table.number_of_tiers()
		questions=dict()
		root=phonetic_class(feature_table=feature_table)
		unvisited=collections.deque()
		unvisited.append(root)
		while unvisited:
			node=unvisited.popleft()
			if node.level==number_of_tiers:
				if not node.is_null():
					phonemes=tuple(sorted(node.phonemes()))
					if phonemes not in questions:
						questions[phonemes]=node.name()
			else:
				node.expand()
				unvisited.extend(node.children)
		return collections.OrderedDict(sorted(questions.items(),key=lambda p: len(p[0]),reverse=True))

	def load_hts_features(self):
		features=collections.OrderedDict()
		doc=xml.parse(os.path.join(langdir,self.settings["language"],"labelling.xml"))
		prefix=""
		for elem in doc.find("format").iterfind("feature"):
			name=elem.get("name")
			value_pattern=elem.get("value_pattern","[a-zA-Z0-9]+")
			suffix=elem.tail.strip()
			features[name]=hts_feature(name,prefix,suffix,value_pattern)
			prefix=suffix
		return features

	def extract_values(self,hts_features):
		label_dir=os.path.join("data","labels","full")
		values=collections.defaultdict(set)
		for fname in os.listdir(label_dir):
			if fname.endswith(".lab"):
				with open(os.path.join(label_dir,fname),"rb") as f:
					for line in f:
						label=line.split()[-1]
						for name,feature in hts_features.items():
							value=feature.extract_value(label)
							try:
								value=int(value)
							except ValueError:
								pass
							values[name].add(value)
		return values

	def write_feature_questions(self,f_out,feature,values,phonetic_questions=None):
		m=re.match("^(?:(?:prev_)*|(?:next_)*)(?:name|(?:syl_vowel(?:_in_word)?))$",feature.name)
		if m:
			for phonemes,question in phonetic_questions.items():
				if set(phonemes).intersection(set(values)):
					if "vowel" in feature.name and set(phonemes).difference(self.vowels):
						continue
					if feature.name.endswith("name"):
						prefix=feature.name[:-4]
					else:
						prefix=feature.name+"_"
					f_out.write('QS "{}{}"\t{{{}}}\n'.format(prefix,question,",".join(feature.format_string.format(phoneme) for phoneme in phonemes)))
			f_out.write("\n")
		vals2=[]
		for v in values:
			vals2.append(str(v))
		for value in sorted(vals2):
			f_out.write('QS "{}=={}"\t{{{}}}\n'.format(feature.name,value,feature.format_string.format(value)))
		f_out.write("\n")
		if not m:
			real_values=sorted(v for v in vals2 if v!="x")
			if real_values and all(isinstance(v,int) for v in real_values) and (real_values[-1]>1):
				min_val=min(1,min(real_values))
				for v in real_values:
					lesser_values=["x"] if "x" in values else list()
					lesser_values.extend(range(min_val,v+1))
					f_out.write('QS "{}<={}"\t{{{}}}\n'.format(feature.name,v,",".join(feature.format_string.format(i) for i in lesser_values)))
				f_out.write("\n")

	def __call__(self,args):
		qstdir=os.path.join("data","questions")
		if not os.path.isdir(qstdir):
			os.mkdir(qstdir)
		self.phoneset=self.get_phoneset()
		self.vowels=set(phone for phone,features in self.phoneset.items() if features.get("vc")=="+")
		phonetic_questions=self.generate_phonetic_questions()
		hts_features=self.load_hts_features()
		all_values=self.extract_values(hts_features)
		with open(os.path.join(qstdir,"questions_qst001.hed"),"w") as f_out:
			for name,feature in hts_features.items():
				values=all_values[name]
				self.write_feature_questions(f_out,feature,values,phonetic_questions)
		with open(os.path.join(qstdir,"questions_utt_qst001.hed"),"w") as f_out:
			for name in ["num_syls_in_utt","num_words_in_utt","num_phrases_in_utt"]:
				feature=hts_features[name]
				values=all_values[name]
				self.write_feature_questions(f_out,feature,values)

class realigner(task):
	def register(self):
		subparser=subparsers.add_parser("realign")
		subparser.set_defaults(func=self)

	def copy_models(self):
		for type in ["cmp","dur"]:
			inpath=os.path.join("models","qst001",self.version_name,type,"re_clustered.mmf.embedded.gz")
			outpath=os.path.join(self.moddir,type+".mmf.gz")
			subprocess.check_call(["cp",inpath,outpath])
			subprocess.check_call(["gunzip",outpath])

	def align(self):
		cmd=[os.path.join(self.settings["bindir"],"HSMMAlign")]
		cmd.extend(["-A","-D","-T","1"])
		cmd.extend(["-C",os.path.join("configs","qst001",self.version_name,"trn.cnf")])
		cmd.extend(["-S",os.path.join("data","scp","train.scp")])
		cmd.extend(["-I",os.path.join("data","labels","full.mlf")])
		cmd.extend(["-t","1500","100","5000"])
		cmd.extend(["-w","1.0"])
		cmd.extend(["-H",os.path.join(self.moddir,"cmp.mmf")])
		cmd.extend(["-N",os.path.join(self.moddir,"dur.mmf")])
		cmd.extend(["-m",self.fulldir])
		lst=os.path.join("data","lists","full.list")
		cmd.extend([lst,lst])
		subprocess.check_call(cmd)

	def save_mono(self):
		name_regex=re.compile(r"^[a-z0-9]+\^[a-z0-9]+\-([a-z0-9]+)\+.+$")
		for filename in sorted(os.listdir(self.fulldir)):
			with open(os.path.join(self.fulldir,filename),"r") as f_in:
				with open(os.path.join(self.monodir,filename),"w") as f_out:
					for line in f_in:
						start,end,lab=line.split()
						name=name_regex.match(lab).group(1)
						f_out.write("{} {} {}\n".format(start,end,name))

	def cleanup(self):
		os.remove(os.path.join(self.moddir,"cmp.mmf"))
		os.remove(os.path.join(self.moddir,"dur.mmf"))
		os.rmdir(self.moddir)

	def update_mlf(self):
		fmt_args={}
		fmt_args["dataset"]=dataset
		fmt_args["speaker"]=self.settings["speaker"]
		fmt_args["dir"]=os.path.abspath(self.workdir)
		for type in ["mono","full"]:
			with open(os.path.join("data","labels",type+".mlf"),"w") as fp:
				fp.write("#!MLF!#\n")
				fp.write('"*/{dataset}_{speaker}_*.lab" -> "{dir}/{type}"\n'.format(type=type,**fmt_args))

	def copy_lpf(self):
		dir=os.path.join("voices","qst001")
		version_name="ver"+str(self.version+1)
		indir=os.path.join(dir,self.version_name)
		outdir=os.path.join(dir,version_name)
		os.mkdir(outdir)
		for filename in ["lpf.pdf","tree-lpf.inf","lpf.win1"]:
			shutil.copy(os.path.join(indir,filename),os.path.join(outdir,filename))

	def update_version_number(self):
		path=os.path.join("scripts","Config.pm")
		with open(path,"r") as ifp:
			old_contents=ifp.read()
		version_line="$ver = '{}';".format((self.version+1))
		new_contents=version_pattern.sub(version_line,old_contents)
		with open(path,"w") as ofp:
			ofp.write(new_contents)

	def __call__(self,args):
		self.workdir=os.path.join("realigned",self.version_name)
		self.moddir=os.path.join(self.workdir,"mod")
		self.monodir=os.path.join(self.workdir,"mono")
		self.fulldir=os.path.join(self.workdir,"full")
		for dir in [self.workdir,self.moddir,self.monodir,self.fulldir]:
			if not os.path.isdir(dir):
				os.makedirs(dir)
		self.copy_models()
		self.align()
		self.save_mono()
		self.cleanup()
		self.update_mlf()
		self.update_version_number()

class initializer(task):
	def register(self):
		subparser=subparsers.add_parser("init")
		subparser.set_defaults(func=self)

	def __call__(self,args):
		assert(len(os.listdir(workdir))==0)
		with open(os.path.join(scriptdir,"training.cfg")) as ifp:
			settings=json.load(ifp,object_pairs_hook=collections.OrderedDict)
		settings["outdir"]=voxdir
		with open(os.path.join(workdir,"training.cfg"),"w") as ofp:
			json.dump(settings,ofp,indent=0)
		htsdir=os.path.abspath(os.path.join(scriptdir,"..","hts"))
		for name in sorted(os.listdir(htsdir)):
			inpath=os.path.join(htsdir,name)
			outpath=os.path.join(workdir,name)
			if os.path.isdir(inpath):
				shutil.copytree(inpath,outpath)
			else:
				shutil.copy2(inpath,workdir)

class lpf_maker(task):
	def register(self):
		subparser=subparsers.add_parser("make-lpf")
		subparser.set_defaults(func=self)

	def make_filters(self):
		ns=5
		nf=6
		nt=31
		counts=nf*numpy.ones(ns,dtype=numpy.int32)
		vars=numpy.zeros(nt,dtype=numpy.float32)
		filters=numpy.zeros((nf,nt),dtype=numpy.float32)
		for j in range(nf):
			cf=(j+1)*1000
			try:
				filters[j]=firwin(nt,cf,window="hanning",fs=self.settings["sample_rate"])
			except:
				filters[j]=firwin(nt,cf,window="hann",fs=self.settings["sample_rate"])
		with open(os.path.join(self.outdir,"lpf.pdf"),"wb") as fp:
			counts.tofile(fp)
			for i in range(ns):
				for j in range(nf):
					filters[j].tofile(fp)
					vars.tofile(fp)

	def make_tree(self):
		phoneset=self.get_phoneset()
		sibs=[]
		for ph,feats in phoneset.items():
			if feats.get("csib",None)=="+":
				sibs.append(ph)
		sibs.sort()
		with open(os.path.join(self.outdir,"tree-lpf.inf"),"wt") as fp:
			fp.write("QS sibilant {{ {} }}\n".format(",".join('"*-{}+*"'.format(ph) for ph in sibs)))
			fp.write("QS prev_sibilant {{ {} }}\n".format(",".join('"*^{}-*"'.format(ph) for ph in sibs)))
			fp.write("QS next_sibilant {{ {} }}\n".format(",".join('"*+{}=*"'.format(ph) for ph in sibs)))
			fp.write('QS prev_name==pau { "*^pau-*" }\n')
			fp.write('QS next_name==pau { "*+pau=*" }\n')
			fp.write("\n")
			for i in range(2,7):
				fp.write("{{*}}[{}]\n".format(i))
				fp.write("{\n")
				if i==2:
					fp.write('0 sibilant "lpf_s2_6" -1\n')
					fp.write('-1 prev_name==pau -2 "lpf_s2_2"\n')
					fp.write('-2 prev_sibilant "lpf_s2_6" "lpf_s2_2"\n')
				elif i==6:
					fp.write('0 sibilant "lpf_s6_6" -1\n')
					fp.write('-1 next_name==pau -2 "lpf_s6_2"\n')
					fp.write('-2 next_sibilant "lpf_s6_6" "lpf_s6_2"\n')
				else:
					fp.write('0 sibilant "lpf_s{i}_6" "lpf_s{i}_2"\n'.format(i=i))
				fp.write("}\n")
				fp.write("\n")

	def make_window(self):
		with open(os.path.join(self.outdir,"lpf.win1"),"wt") as fp:
			fp.write("1 1.0\n")

	def __call__(self,args):
		self.outdir=os.path.join("voices","qst001","ver1")
		if not os.path.exists(self.outdir):
			os.makedirs(self.outdir)
		self.make_filters()
		self.make_tree()
		self.make_window()

class voice_exporter(task):
	def register(self):
		subparser=subparsers.add_parser("export-voice")
		subparser.add_argument("--version",type=int,default=0)
		subparser.set_defaults(func=self)

	def load_config(self,path):
		conf=collections.OrderedDict()
		if not os.path.isfile(path):
			return conf
		with open(path,"r") as f:
			for line in f:
				toks=line.strip().split("=")
				if not toks:
					continue
				conf[toks[0]]=toks[1]
		return conf

	def save_config(self,conf,path):
		with open(path,"w") as f:
			for key,val in conf.items():
				f.write(key)
				f.write("=")
				f.write(val)
				f.write("\n")

	def save_info(self):
		path=os.path.join(self.outdir,"voice.info")
		info=self.load_config(path)
		name=self.settings["speaker"].title()
		if "name" in info:
			assert(name==info["name"])
		else:
			info["name"]=name
		if "language" not in info:
			info["language"]=self.settings["language"].title()
		if "gender" not in info:
			info["gender"]=self.settings["gender"].lower()
		info["format"]=self.voice_format
		info["revision"]="0"
		self.save_config(info,path)

	def get_key(self):
		print("Calculating the speaker's median f0")
		f0s=[]
		for f in pathlib.Path("data/lf0").iterdir():
			utt_lf0s=numpy.fromfile(f, dtype=numpy.float32)
			utt_f0s=numpy.exp([v for v in utt_lf0s if v>0])
			f0s.extend(utt_f0s)
		key=int(numpy.round(numpy.median(f0s)))
		print("Median f0 = ", key)
		return key

	def save_params(self):
		path=os.path.join(self.outdir,"voice.params")
		if os.path.isfile(path):
			return
		params=collections.OrderedDict([("beta","0.4"),("gain","1.0")])
		params["key"]=str(self.get_key())
		self.save_config(params,path)

	def copy_voice(self):
		inpath=os.path.join(self.indir,dataset+"_"+self.settings["speaker"]+".htsvoice")
		outpath=os.path.join(self.modoutdir,"voice.data")
		shutil.copy(inpath,outpath)

	def make_conversion_config(self):
		path=os.path.join(self.modoutdir,"convert.conf")
		conf=collections.OrderedDict()
		conf["NATURALREADORDER"]="T"
		conf["NATURALWRITEORDER"]="F"
		self.save_config(conf,path)
		return path

	def make_conversion_script(self,set,type):
		script_path=os.path.abspath(os.path.join(self.modoutdir,type+".hed"))
		tree_path=os.path.abspath(os.path.join("trees","qst001",self.version_name,set,type+".inf.untied"))
		with open(script_path,"w") as f:
			f.write("LT {}\n".format(tree_path))
			f.write("CT {}\n".format(self.modoutdir))
			f.write("CM {}\n".format(self.modoutdir))
		return script_path

	def convert_model_and_tree(self,set,type,n,conf):
		hhed=os.path.join(self.settings["hts22_bindir"],"HHEd")
		mmf=os.path.abspath(os.path.join("models","qst001",self.version_name,set,"re_clustered.mmf"))
		script=self.make_conversion_script(set,type)
		lst=os.path.abspath(os.path.join("data","lists","full.list"))
		cmd=[hhed,"-B","-C",conf,"-T","2","-p","-i","-H",mmf,script,lst]
		subprocess.check_call(cmd)
		os.remove(script)
		os.rename(os.path.join(self.modoutdir,"trees.{}".format(n)),os.path.join(self.modoutdir,"tree-{}.inf".format(type)))
		os.rename(os.path.join(self.modoutdir,"pdf.{}".format(n)),os.path.join(self.modoutdir,"{}.pdf".format(type)))

	def copy_windows(self,type,n):
		for i in range(1,n+1):
			file_name="{}.win{}".format(type,i)
			shutil.copy2(os.path.join("data","win",file_name),os.path.join(self.modoutdir,file_name))

	def copy_lpf(self):
		shutil.copy(os.path.join(self.indir,"tree-lpf.inf"),os.path.join(self.modoutdir,"tree-lpf.inf"))
		shutil.copy(os.path.join(self.indir,"lpf.win1"),os.path.join(self.modoutdir,"lpf.win1"))
		numtaps=31
		numstates=5
		with open(os.path.join(self.indir,"lpf.pdf"),"rb") as ifp:
			with open(os.path.join(self.modoutdir,"lpf.pdf"),"wb") as ofp:
				ofp.write(struct.pack(">i",0))
				ofp.write(struct.pack(">i",1))
				ofp.write(struct.pack(">i",numtaps))
				for i in range(numstates):
					n=struct.unpack("<i",ifp.read(4))[0]
					ofp.write(struct.pack(">i",n))
				while True:
					for i in range(numtaps):
						bytes=ifp.read(4)
						if not bytes:
							return
						c=struct.unpack("<f",bytes)[0]
						ofp.write(struct.pack(">ff",c,0))
					ifp.read(4*numtaps)

	def copy_bpf(self):
		shutil.copy(os.path.join("data","bpf.txt"),os.path.join(self.modoutdir,"bpf.txt"))

	def convert_data(self):
		conf_path=self.make_conversion_config()
		self.convert_model_and_tree("cmp","mgc",1,conf_path)
		self.convert_model_and_tree("cmp","lf0",2,conf_path)
		self.convert_model_and_tree("cmp","bap",5,conf_path)
		self.convert_model_and_tree("dur","dur",1,conf_path)
		os.remove(conf_path)
		self.copy_windows("mgc",3)
		self.copy_windows("lf0",3)
		self.copy_windows("bap",3)
		self.copy_bpf()

	def __call__(self,args):
		if args.version>0:
			self._version=args.version
		self.voice_format="4"
		self.indir=os.path.abspath(os.path.join("voices","qst001",self.version_name))
		self.outdir=os.path.abspath(os.path.join(self.settings["outdir"],self.settings["speaker"].lower()))
		self.modoutdir=os.path.join(self.outdir,str(self.settings["sample_rate"]))
		if not os.path.isdir(self.modoutdir):
			os.makedirs(self.modoutdir)
		self.save_info()
		self.save_params()
		self.copy_voice()
		self.convert_data()

class nccf(object):
	def __init__(self,speech,sample_rate,frame_shift,frame_number,f0,avg_f0):
		self.f0=f0
		self.t0=float(sample_rate)/f0
		self.it0=int(numpy.round(self.t0))
		avg_t0=float(sample_rate)/avg_f0
		self.sample_rate=sample_rate
		self.speech=speech
		half_win_len=int(numpy.round(0.5*avg_t0))
		self.win_len=2*half_win_len+1
		self.sample_index=max(0,frame_number*frame_shift-half_win_len)
		self.min_lag=int(numpy.round(self.sample_rate/800.0))
		self.max_lag=int(numpy.round(self.sample_rate/20.0))
		self.size=self.max_lag+2
		self.values=numpy.empty(self.size)
		self.mask=numpy.zeros(self.size,dtype=bool)
		self.x=speech[self.sample_index:self.sample_index+self.win_len]
		self.sum_xx=numpy.sum(self.x*self.x)

	def eval(self,lag):
		if self.sum_xx==0:
			return 0
		sample_index=self.sample_index+lag
		limit=sample_index+self.x.size
		if limit>self.speech.size:
			return 0
		y=self.speech[sample_index:limit]
		sum_yy=numpy.sum(y*y)
		if sum_yy==0:
			sum_yy=1
		sum_xy=numpy.sum(self.x*y)
		return sum_xy/numpy.sqrt(self.sum_xx*sum_yy)

	def get(self,lag):
		if not self.mask[lag]:
			self.values[lag]=self.eval(lag)
			self.mask[lag]=True
		return self.values[lag]

	def is_peak_lag(self,lag):
		if lag<self.min_lag:
			return False
		if lag>self.max_lag:
			return False
		c2=self.get(lag)
		if c2<=0:
			return False
		c1=self.get(lag-1)
		if c2<=c1:
			return False
		c3=self.get(lag+1)
		return (c3<=c2)

	def interpolate_peak(self,lag):
		x1=lag-1
		x2=lag
		x3=lag+1
		y1=self.get(x1)
		y2=self.get(x2)
		y3=self.get(x3)
		dy=0.5*(y3-y1)
		d2y=2.0*y2-y1-y3
		y=y2+0.5*dy*dy/d2y
		x=x2+dy/d2y
		return (float(self.sample_rate)/x,y)

	def find_nearest_peak(self):
		min_lag=self.min_lag
		max_lag=self.max_lag
		lag1=int(numpy.floor(self.t0))
		lag2=lag1+1
		p1=None
		p2=None
		while (lag1>=min_lag and (not p1)) or (lag2<=max_lag and (not p2)):
			if lag1>=min_lag and (not p1):
				if self.is_peak_lag(lag1):
					p1=self.interpolate_peak(lag1)
					if p2:
						if numpy.abs(self.f0-p1[0])<=numpy.abs(self.f0-p2[0]):
							return p1
						else:
							return p2
					d=p1[0]-self.f0
					f=self.f0-d
					max_lag=int(numpy.ceil(self.sample_rate/f))
				lag1-=1
			if lag2<=max_lag and (not p2):
				if self.is_peak_lag(lag2):
					p2=self.interpolate_peak(lag2)
					if p1:
						if numpy.abs(self.f0-p2[0])<=numpy.abs(self.f0-p1[0]):
							return p2
						else:
							return p1
					d=self.f0-p2[0]
					f=self.f0+d
					min_lag=int(numpy.floor(self.sample_rate/f))
				lag2+=1
		assert((not p1) or (not p2))
		if p1:
			return p1
		if p2:
			return p2
		return (self.f0,self.get(self.it0))

class bap_extractor(task):
	def register(self):
		subparser=subparsers.add_parser("extract-bap")
		subparser.add_argument("--skip",type=bool,default=False,help="Skip already extracted data.")
		subparser.add_argument("--cores", type=int, default=1, help="Number of CPU cores to use for parallel extraction.")
		subparser.set_defaults(func=self)

	def make_filters(self):
		freqs=self.get_filter_band_edges()
		num_bands=len(freqs)+1
		tw=1000.0/self.nyq
		n,b=kaiserord(60,tw)
		if n%2==0:
			n+=1
		self.filters=numpy.zeros((num_bands,n))
		self.filters[0]=firwin(n,freqs[0],fs=self.settings["sample_rate"],window=b,scale=False)
		self.filters[-1]=firwin(n,freqs[-1],fs=self.settings["sample_rate"],window=b,pass_zero=False,scale=False)
		for i in range(1,num_bands-1):
			self.filters[i]=firwin(n,(freqs[i-1],freqs[i]),fs=self.settings["sample_rate"],window=b,pass_zero=False,scale=False)
		with open(os.path.join("data","bpf.txt"),"w") as fp:
			numpy.array(self.filters.shape).tofile(fp,sep="\n")
			fp.write("\n")
			self.filters.tofile(fp,sep="\n")
			fp.write("\n")

	def split_into_bands(self,speech):
		num_bands=self.filters.shape[0]
		bands=numpy.zeros((num_bands,speech.size))
		num_taps=self.filters.shape[1]
		delay=(num_taps-1)//2
		padded_speech=numpy.append(speech,numpy.zeros(delay))
		for i in range(num_bands):
			bands[i]=lfilter(self.filters[i],1,padded_speech)[delay:]
		return bands

	def compute_correlation_coefficients(self,speech,bands,f0):
		num_bands=bands.shape[0]
		cc=numpy.zeros((f0.size,num_bands))
		af0=numpy.median(f0[f0!=0])
		for i in range(f0.size):
			if f0[i]==0:
				continue
			cf0=f0[i]
			for j in range(0,num_bands):
				f=nccf(bands[j],self.sample_rate,self.frame_shift,i,cf0,af0)
				f0_j,cc[i,j]=f.find_nearest_peak()
		return cc

	def process(self,name):
		speech=self.load_speech(name)
		f0=self.load_f0(name)
		bands=self.split_into_bands(speech)
		cc=self.compute_correlation_coefficients(speech,bands,f0)
		cc[cc<0]=0
		cc[cc>=(1-1e-10)]=1-1e-10
		bap=10.0*numpy.log10(1-cc)
		bap.astype(numpy.float32).tofile(os.path.join(self.bap_dir,name+".bap"))

	def __call__(self,args):
		self.sample_rate=self.settings["sample_rate"]
		self.nyq=self.sample_rate/2.0
		self.frame_shift=self.get_analysis_params()["FRAMESHIFT"]
		self.make_filters()
		self.bap_dir=os.path.join("data","bap")
		if not os.path.isdir(self.bap_dir):
			os.mkdir(self.bap_dir)
		filelist = os.listdir(os.path.join("data","raw"))
		tasks = []
		for name in filelist:
			base,ext=os.path.splitext(name)
			if not (args.skip and os.path.exists(os.path.join(self.bap_dir,f"{base}.bap"))):
				tasks.append(base)
		if not tasks:
			print("Extracting BAP: all files are already processed.")
			return
		print(f"Extracting BAP using {args.cores} core(s).")
		if args.cores > 1:
			with multiprocessing.Pool(processes=args.cores) as pool:
				list(tqdm(pool.imap_unordered(self.process, tasks), total=len(tasks), desc="Processing"))
		else:
			for name in tqdm(tasks, desc="Processing"):
				self.process(name)

class mgc_extractor(task):
	def register(self):
		subparser=subparsers.add_parser("extract-mgc")
		subparser.add_argument("--skip",type=bool,default=False,help="Skip already extracted data.")
		subparser.add_argument("--cores", type=int, default=1, help="Number of CPU cores to use for parallel extraction.")
		subparser.set_defaults(func=self)

	def process(self,name):
		sample_rate=self.settings["sample_rate"]
		speech=self.load_speech(name)
		if self.settings.get("audio_normalization",True):
			target_db=self.settings.get("volume",-20)
			target=10.0**(target_db/20.0)
			rms=numpy.sqrt(numpy.mean((speech*speech)))
			nf=target/rms
			speech*=nf
		f0s=self.load_f0(name)
		mf0=numpy.median(f0s[f0s!=0])
		f0s[f0s==0]=mf0
		times=numpy.arange(len(f0s))*0.005
		sp=pyworld.cheaptrick(speech, f0s, times, sample_rate,f0_floor=self.settings["lower_f0"])
		fft_len=(sp.shape[1]-1)*2
		sp=numpy.sqrt(sp)*32768.0
		tmp = str(uuid.uuid4())
		sp.astype(numpy.float32).tofile(tmp)
		mcep_cmd="{mcep} -a {a} -m {m} -l {l} -e 1.0E-08 -q 3 {tmpfile} > {mgc}".format(mcep=os.path.join(self.settings["bindir"],"mcep"),a=self.aparams["FREQWARP"],m=self.aparams["MGCORDER"],l=fft_len,tmpfile=tmp,mgc=os.path.join(self.mgc_dir,name+".mgc"))
		subprocess.check_call(mcep_cmd,shell=True)
		os.remove(tmp)

	def __call__(self,args):
		self.aparams=self.get_analysis_params()
		self.mgc_dir=os.path.join("data","mgc")
		self.raw_dir=os.path.join("data", "raw")
		self.filelist = os.listdir(self.raw_dir)
		if not os.path.isdir(self.mgc_dir):
			os.mkdir(self.mgc_dir)
		tasks = []
		for name in self.filelist:
			base,ext=os.path.splitext(name)
			if not (args.skip and os.path.exists(os.path.join(self.mgc_dir,f"{base}.mgc"))):
				tasks.append(base)
		if not tasks:
			print("Extracting MGC: All files are already processed or skip is not enabled.")
			return
		print(f"Extracting MGC using {args.cores} core(s).")
		if args.cores > 1:
			with multiprocessing.Pool(processes=args.cores) as pool:
				list(tqdm(pool.imap_unordered(self.process, tasks), total=len(tasks), desc="Processing"))
		else:
			for name in tqdm(tasks, desc="Processing"):
				self.process(name)


if __name__=="__main__":
	for cls in [initializer,configurator,recordings_importer,labeller,f0_extracter,questions_maker,f0_range_computer,synthesizer,realigner,htk_segmenter,lpf_maker,voice_exporter,bap_extractor,mgc_extractor]:
		obj=cls()
		obj.register()
	args=parser.parse_args()
	if not hasattr(args, 'func'):
		parser.error("Not enough parameters.");
	else:
		args.func(args)
